5.CNN-Image-Classification

第五课 CNN图像分类

褚则伟 zeweichu@gmail.com

参考资料

1
2
3
4
5
6
7
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
print("PyTorch Version: ",torch.__version__)
PyTorch Version:  1.0.0

首先我们定义一个基于ConvNet的简单神经网络

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 20, 5, 1) # 28 * 28 -> (28+1-5) 24 * 24
self.conv2 = nn.Conv2d(20, 50, 5, 1) # 20 * 20
self.fc1 = nn.Linear(4*4*50, 500)
self.fc2 = nn.Linear(500, 10)

def forward(self, x):
# x: 1 * 28 * 28
x = F.relu(self.conv1(x)) # 20 * 24 * 24
x = F.max_pool2d(x,2,2) # 12 * 12
x = F.relu(self.conv2(x)) # 8 * 8
x = F.max_pool2d(x,2,2) # 4 *4
x = x.view(-1, 4*4*50) # reshape (5 * 2 * 10), view(5, 20) -> (5 * 20)
x = F.relu(self.fc1(x))
x= self.fc2(x)
# return x
return F.log_softmax(x, dim=1) # log probability
1
2
3
4
5
mnist_data = datasets.MNIST("./mnist_data", train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
]))
mnist_data
<torchvision.datasets.mnist.MNIST at 0x7fa362b9c7b8>
1
data = [d[0].data.cpu().numpy() for d in mnist_data]
1
np.mean(data)
0.13066062
1
np.std(data)
0.30810776
1
mnist_data[223][0].shape
torch.Size([1, 28, 28])
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
def train(model, device, train_loader, optimizer, epoch):
model.train()
for idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)

pred = model(data) # batch_size * 10
loss = F.nll_loss(pred, target)

# SGD
optimizer.zero_grad()
loss.backward()
optimizer.step()

if idx % 100 == 0:
print("Train Epoch: {}, iteration: {}, Loss: {}".format(
epoch, idx, loss.item()))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
def test(model, device, test_loader):
model.eval()
total_loss = 0.
correct = 0.
with torch.no_grad():
for idx, (data, target) in enumerate(test_loader):
data, target = data.to(device), target.to(device)

output = model(data) # batch_size * 10
total_loss += F.nll_loss(output, target, reduction="sum").item()
pred = output.argmax(dim=1) # batch_size * 1
correct += pred.eq(target.view_as(pred)).sum().item()


total_loss /= len(test_loader.dataset)
acc = correct/len(test_loader.dataset) * 100.
print("Test loss: {}, Accuracy: {}".format(total_loss, acc))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 32
train_dataloader = torch.utils.data.DataLoader(
datasets.MNIST("./mnist_data", train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True,
num_workers=1, pin_memory=True
)
test_dataloader = torch.utils.data.DataLoader(
datasets.MNIST("./mnist_data", train=False, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True,
num_workers=1, pin_memory=True
)

lr = 0.01
momentum = 0.5
model = Net().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)

num_epochs = 2
for epoch in range(num_epochs):
train(model, device, train_dataloader, optimizer, epoch)
test(model, device, test_dataloader)

torch.save(model.state_dict(), "mnist_cnn.pt")
Train Epoch: 0, iteration: 0, Loss: 2.283817768096924
Train Epoch: 0, iteration: 100, Loss: 0.6110288500785828
Train Epoch: 0, iteration: 200, Loss: 0.18155980110168457
Train Epoch: 0, iteration: 300, Loss: 0.31043028831481934
Train Epoch: 0, iteration: 400, Loss: 0.518582284450531
Train Epoch: 0, iteration: 500, Loss: 0.1202855259180069
Train Epoch: 0, iteration: 600, Loss: 0.0989612340927124
Train Epoch: 0, iteration: 700, Loss: 0.09637182205915451
Train Epoch: 0, iteration: 800, Loss: 0.13470694422721863
Train Epoch: 0, iteration: 900, Loss: 0.06548292934894562
Train Epoch: 0, iteration: 1000, Loss: 0.03107370436191559
Train Epoch: 0, iteration: 1100, Loss: 0.03948028385639191
Train Epoch: 0, iteration: 1200, Loss: 0.09810394793748856
Train Epoch: 0, iteration: 1300, Loss: 0.15199752151966095
Train Epoch: 0, iteration: 1400, Loss: 0.016710489988327026
Train Epoch: 0, iteration: 1500, Loss: 0.005827277898788452
Train Epoch: 0, iteration: 1600, Loss: 0.0754864513874054
Train Epoch: 0, iteration: 1700, Loss: 0.012112855911254883
Train Epoch: 0, iteration: 1800, Loss: 0.03425520658493042
Test loss: 0.07333157858848571, Accuracy: 97.71
Train Epoch: 1, iteration: 0, Loss: 0.07740284502506256
Train Epoch: 1, iteration: 100, Loss: 0.018157958984375
Train Epoch: 1, iteration: 200, Loss: 0.006041824817657471
Train Epoch: 1, iteration: 300, Loss: 0.1392734944820404
Train Epoch: 1, iteration: 400, Loss: 0.022600188851356506
Train Epoch: 1, iteration: 500, Loss: 0.020594105124473572
Train Epoch: 1, iteration: 600, Loss: 0.031451016664505005
Train Epoch: 1, iteration: 700, Loss: 0.09078143537044525
Train Epoch: 1, iteration: 800, Loss: 0.013186424970626831
Train Epoch: 1, iteration: 900, Loss: 0.04006651043891907
Train Epoch: 1, iteration: 1000, Loss: 0.014285147190093994
Train Epoch: 1, iteration: 1100, Loss: 0.22637280821800232
Train Epoch: 1, iteration: 1200, Loss: 0.02185329794883728
Train Epoch: 1, iteration: 1300, Loss: 0.13519427180290222
Train Epoch: 1, iteration: 1400, Loss: 0.021606311202049255
Train Epoch: 1, iteration: 1500, Loss: 0.016718149185180664
Train Epoch: 1, iteration: 1600, Loss: 0.07150381058454514
Train Epoch: 1, iteration: 1700, Loss: 0.041178762912750244
Train Epoch: 1, iteration: 1800, Loss: 0.004264324903488159
Test loss: 0.040256525754928586, Accuracy: 98.61

NLL loss的定义

$\ell(x, y) = L = {l_1,\dots,l_N}^\top, \quad
l_n = - w_{y_n} x_{n,y_n}, \quad
w_{c} = \text{weight}[c] \cdot \mathbb{1}{c \not= \text{ignore_index}}$

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 32
train_dataloader = torch.utils.data.DataLoader(
datasets.FashionMNIST("./fashion_mnist_data", train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True,
num_workers=1, pin_memory=True
)
test_dataloader = torch.utils.data.DataLoader(
datasets.FashionMNIST("./fashion_mnist_data", train=False, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True,
num_workers=1, pin_memory=True
)

lr = 0.01
momentum = 0.5
model = Net().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)

num_epochs = 2
for epoch in range(num_epochs):
train(model, device, train_dataloader, optimizer, epoch)
test(model, device, test_dataloader)

torch.save(model.state_dict(), "fashion_mnist_cnn.pt")
Train Epoch: 0, iteration: 0, Loss: 2.2915596961975098
Train Epoch: 0, iteration: 100, Loss: 1.0237065553665161
Train Epoch: 0, iteration: 200, Loss: 0.840910017490387
Train Epoch: 0, iteration: 300, Loss: 0.7526986598968506
Train Epoch: 0, iteration: 400, Loss: 0.9580956697463989
Train Epoch: 0, iteration: 500, Loss: 0.6261149644851685
Train Epoch: 0, iteration: 600, Loss: 0.4255485534667969
Train Epoch: 0, iteration: 700, Loss: 0.4818880558013916
Train Epoch: 0, iteration: 800, Loss: 0.731956958770752
Train Epoch: 0, iteration: 900, Loss: 0.45393142104148865
Train Epoch: 0, iteration: 1000, Loss: 0.7139236927032471
Train Epoch: 0, iteration: 1100, Loss: 0.4227047562599182
Train Epoch: 0, iteration: 1200, Loss: 0.23375816643238068
Train Epoch: 0, iteration: 1300, Loss: 0.4680781960487366
Train Epoch: 0, iteration: 1400, Loss: 0.352077841758728
Train Epoch: 0, iteration: 1500, Loss: 0.36358141899108887
Train Epoch: 0, iteration: 1600, Loss: 0.46214842796325684
Train Epoch: 0, iteration: 1700, Loss: 0.4750059247016907
Train Epoch: 0, iteration: 1800, Loss: 0.4483456015586853
Test loss: 0.45549455399513245, Accuracy: 83.48
Train Epoch: 1, iteration: 0, Loss: 0.46502870321273804
Train Epoch: 1, iteration: 100, Loss: 0.4504859745502472
Train Epoch: 1, iteration: 200, Loss: 0.5228638648986816
Train Epoch: 1, iteration: 300, Loss: 0.507514476776123
Train Epoch: 1, iteration: 400, Loss: 0.33425623178482056
Train Epoch: 1, iteration: 500, Loss: 0.15890713036060333
Train Epoch: 1, iteration: 600, Loss: 0.4329398274421692
Train Epoch: 1, iteration: 700, Loss: 0.47604358196258545
Train Epoch: 1, iteration: 800, Loss: 0.40596315264701843
Train Epoch: 1, iteration: 900, Loss: 0.31725335121154785
Train Epoch: 1, iteration: 1000, Loss: 0.5835919380187988
Train Epoch: 1, iteration: 1100, Loss: 0.3334502577781677
Train Epoch: 1, iteration: 1200, Loss: 0.3043973743915558
Train Epoch: 1, iteration: 1300, Loss: 0.3891294002532959
Train Epoch: 1, iteration: 1400, Loss: 0.20209042727947235
Train Epoch: 1, iteration: 1500, Loss: 0.26769235730171204
Train Epoch: 1, iteration: 1600, Loss: 0.366751104593277
Train Epoch: 1, iteration: 1700, Loss: 0.16336065530776978
Train Epoch: 1, iteration: 1800, Loss: 0.48901161551475525
Test loss: 0.37672775785923, Accuracy: 86.15

CNN模型的迁移学习

  • 很多时候当我们需要训练一个新的图像分类任务,我们不会完全从一个随机的模型开始训练,而是利用_预训练_的模型来加速训练的过程。我们经常使用在ImageNet上的预训练模型。
  • 这是一种transfer learning的方法。我们常用以下两种方法做迁移学习。
    • fine tuning: 从一个预训练模型开始,我们改变一些模型的架构,然后继续训练整个模型的参数。
    • feature extraction: 我们不再改变预训练模型的参数,而是只更新我们改变过的部分模型参数。我们之所以叫它feature extraction是因为我们把预训练的CNN模型当做一个特征提取模型,利用提取出来的特征做来完成我们的训练任务。

以下是构建和训练迁移学习模型的基本步骤:

  • 初始化预训练模型
  • 把最后一层的输出层改变成我们想要分的类别总数
  • 定义一个optimizer来更新参数
  • 模型训练
1
2
3
4
5
6
7
8
9
import numpy as np
import torchvision
from torchvision import datasets, transforms, models

import matplotlib.pyplot as plt
import time
import os
import copy
print("Torchvision Version: ",torchvision.__version__)
Torchvision Version:  0.2.0

数据

我们会使用hymenoptera_data数据集,下载.

这个数据集包括两类图片, beesants, 这些数据都被处理成了可以使用ImageFolder <https://pytorch.org/docs/stable/torchvision/datasets.html#torchvision.datasets.ImageFolder>来读取的格式。我们只需要把data_dir设置成数据的根目录,然后把model_name设置成我们想要使用的与训练模型:
::
[resnet, alexnet, vgg, squeezenet, densenet, inception]

其他的参数有:

  • num_classes表示数据集分类的类别数
  • batch_size
  • num_epochs
  • feature_extract表示我们训练的时候使用fine tuning还是feature extraction方法。如果feature_extract = False,整个模型都会被同时更新。如果feature_extract = True,只有模型的最后一层被更新。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# Top level data directory. Here we assume the format of the directory conforms
# to the ImageFolder structure
data_dir = "./hymenoptera_data"
# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_name = "resnet"
# Number of classes in the dataset
num_classes = 2
# Batch size for training (change depending on how much memory you have)
batch_size = 32
# Number of epochs to train for
num_epochs = 15
# Flag for feature extracting. When False, we finetune the whole model,
# when True we only update the reshaped layer params
feature_extract = True

input_size = 224

读入数据

现在我们知道了模型输入的size,我们就可以把数据预处理成相应的格式。

1
2
3
4
5
6
all_imgs = datasets.ImageFolder(os.path.join(data_dir, "train"), transforms.Compose([
transforms.RandomResizedCrop(input_size),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
]))
loader = torch.utils.data.DataLoader(all_imgs, batch_size=batch_size, shuffle=True, num_workers=4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
data_transforms = {
"train": transforms.Compose([
transforms.RandomResizedCrop(input_size),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
"val": transforms.Compose([
transforms.Resize(input_size),
transforms.CenterCrop(input_size),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
}

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ["train", "val"]}

dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x],
batch_size=batch_size, shuffle=True, num_workers=4) for x in ["train", "val"]}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
1
img = next(iter(dataloaders_dict["val"]))[0]
1
img.shape
torch.Size([32, 3, 224, 224])
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
unloader = transforms.ToPILImage()  # reconvert into PIL image

plt.ion()

def imshow(tensor, title=None):
image = tensor.cpu().clone() # we clone the tensor to not do changes on it
image = image.squeeze(0) # remove the fake batch dimension
image = unloader(image)
plt.imshow(image)
if title is not None:
plt.title(title)
plt.pause(0.001) # pause a bit so that plots are updated


plt.figure()
imshow(img[11], title='Image')

png

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
def set_parameter_requires_grad(model, feature_extract):
if feature_extract:
for param in model.parameters():
param.requires_grad = False

def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
if model_name == "resnet":
model_ft = models.resnet18(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, num_classes)
input_size = 224
else:
print("model not implemented")
return None, None

return model_ft, input_size

model_ft, input_size = initialize_model(model_name,
num_classes, feature_extract, use_pretrained=True)
print(model_ft)
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (layer2): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (layer3): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (layer4): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (avgpool): AvgPool2d(kernel_size=7, stride=1, padding=0)
  (fc): Linear(in_features=512, out_features=2, bias=True)
)
1
model_ft.layer1[0].conv1.weight.requires_grad
False
1
model_ft.fc.weight.requires_grad
True
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def train_model(model, dataloaders, loss_fn, optimizer, num_epochs=5):
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.
val_acc_history = []
for epoch in range(num_epochs):
for phase in ["train", "val"]:
running_loss = 0.
running_corrects = 0.
if phase == "train":
model.train()
else:
model.eval()

for inputs, labels in dataloaders[phase]:
inputs, labels = inputs.to(device), labels.to(device)

with torch.autograd.set_grad_enabled(phase=="train"):
outputs = model(inputs) # bsize * 2
loss = loss_fn(outputs, labels)

preds = outputs.argmax(dim=1)
if phase == "train":
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds.view(-1) == labels.view(-1)).item()

epoch_loss = running_loss / len(dataloaders[phase].dataset)
epoch_acc = running_corrects / len(dataloaders[phase].dataset)

print("Phase {} loss: {}, acc: {}".format(phase, epoch_loss, epoch_acc))

if phase == "val" and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
if phase == "val":
val_acc_history.append(epoch_acc)
model.load_state_dict(best_model_wts)
return model, val_acc_history

模型训练

1
model_ft = model_ft.to(device)
1
2
3
optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
model_ft.parameters()), lr=0.001, momentum=0.9)
loss_fn = nn.CrossEntropyLoss()
1
_, ohist = train_model(model_ft, dataloaders_dict, loss_fn, optimizer, num_epochs=num_epochs)
Phase train loss: 0.2441009590860273, acc: 0.9139344262295082
Phase val loss: 0.2058036023495244, acc: 0.9477124183006536
Phase train loss: 0.2242034280397853, acc: 0.9221311475409836
Phase val loss: 0.19933121480972937, acc: 0.9477124183006536
Phase train loss: 0.2179500304284643, acc: 0.930327868852459
Phase val loss: 0.19292019200480842, acc: 0.9477124183006536
Phase train loss: 0.2038783010889272, acc: 0.9221311475409836
Phase val loss: 0.2022019473750607, acc: 0.9281045751633987
Phase train loss: 0.20605210031642288, acc: 0.9180327868852459
Phase val loss: 0.18852663916700027, acc: 0.9477124183006536
Phase train loss: 0.1799576844348282, acc: 0.9426229508196722
Phase val loss: 0.18889451397010704, acc: 0.9477124183006536
Phase train loss: 0.16676783659419075, acc: 0.9426229508196722
Phase val loss: 0.1854035053280444, acc: 0.9477124183006536
Phase train loss: 0.20258395642530722, acc: 0.930327868852459
Phase val loss: 0.1881853450162738, acc: 0.934640522875817
Phase train loss: 0.17906492948532104, acc: 0.9180327868852459
Phase val loss: 0.17941297795258315, acc: 0.954248366013072
Phase train loss: 0.15364321333463074, acc: 0.9631147540983607
Phase val loss: 0.18958801722604465, acc: 0.9281045751633987
Phase train loss: 0.19896865452899307, acc: 0.9139344262295082
Phase val loss: 0.1826314626176373, acc: 0.954248366013072
Phase train loss: 0.16911793878821077, acc: 0.9344262295081968
Phase val loss: 0.18108942452209448, acc: 0.9477124183006536
Phase train loss: 0.16306845306373033, acc: 0.9467213114754098
Phase val loss: 0.1891336505806524, acc: 0.9281045751633987
Phase train loss: 0.1875694076545903, acc: 0.9385245901639344
Phase val loss: 0.1793875343659345, acc: 0.9477124183006536
Phase train loss: 0.20147151096922453, acc: 0.9139344262295082
Phase val loss: 0.18119409422274507, acc: 0.9411764705882353
1
2
3
4
5
6
7
model_scratch, _ = initialize_model(model_name,
num_classes, feature_extract=False, use_pretrained=False)
model_scratch = model_scratch.to(device)
optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
model_scratch.parameters()), lr=0.001, momentum=0.9)
loss_fn = nn.CrossEntropyLoss()
_, scratch_hist = train_model(model_scratch, dataloaders_dict, loss_fn, optimizer, num_epochs=num_epochs)
Phase train loss: 0.73858080437926, acc: 0.47950819672131145
Phase val loss: 0.704963364632301, acc: 0.46405228758169936
Phase train loss: 0.668987612255284, acc: 0.5614754098360656
Phase val loss: 0.6597700851415497, acc: 0.6339869281045751
Phase train loss: 0.6411691278707786, acc: 0.6557377049180327
Phase val loss: 0.6726375681902069, acc: 0.5751633986928104
Phase train loss: 0.6194371883986426, acc: 0.6352459016393442
Phase val loss: 0.6313814318257999, acc: 0.6274509803921569
Phase train loss: 0.6170555851498588, acc: 0.6475409836065574
Phase val loss: 0.6528662945709977, acc: 0.6274509803921569
Phase train loss: 0.6110719637792619, acc: 0.6762295081967213
Phase val loss: 0.626404657472972, acc: 0.6405228758169934
Phase train loss: 0.5864127718034338, acc: 0.639344262295082
Phase val loss: 0.6282203664966658, acc: 0.6405228758169934
Phase train loss: 0.5998562847981688, acc: 0.6680327868852459
Phase val loss: 0.6236733716297773, acc: 0.6274509803921569
Phase train loss: 0.5662176755608105, acc: 0.6885245901639344
Phase val loss: 0.5790788285872516, acc: 0.6862745098039216
Phase train loss: 0.5466401464626437, acc: 0.7131147540983607
Phase val loss: 0.5834652006236556, acc: 0.7124183006535948
Phase train loss: 0.5393341779708862, acc: 0.7295081967213115
Phase val loss: 0.5651591182534211, acc: 0.6797385620915033
Phase train loss: 0.5473490689621597, acc: 0.7172131147540983
Phase val loss: 0.5568503246587866, acc: 0.673202614379085
Phase train loss: 0.5429437048122531, acc: 0.7090163934426229
Phase val loss: 0.6801646998505188, acc: 0.6339869281045751
Phase train loss: 0.512938850238675, acc: 0.7254098360655737
Phase val loss: 0.6064363223275328, acc: 0.6862745098039216
Phase train loss: 0.5331279508403091, acc: 0.6885245901639344
Phase val loss: 0.5726334435098311, acc: 0.6928104575163399

我们来plot模型训练时候loss的变化

1
2
3
4
5
6
7
8
9
10
11
12
13
14

# Plot the training curves of validation accuracy vs. number
# of training epochs for the transfer learning method and
# the model trained from scratch

plt.title("Validation Accuracy vs. Number of Training Epochs")
plt.xlabel("Training Epochs")
plt.ylabel("Validation Accuracy")
plt.plot(range(1,num_epochs+1),ohist,label="Pretrained")
plt.plot(range(1,num_epochs+1),scratch_hist,label="Scratch")
plt.ylim((0,1.))
plt.xticks(np.arange(1, num_epochs+1, 1.0))
plt.legend()
plt.show()

png

课后学习